'''
爬取生物科技新闻
'''
from apr13.pcUtils import *
import time

s = Scrawler()
for page in range(1,11):
    url = "https://www.jiemian.com/tags/2066/{}.html".format(page)
    #1。获取该页面上每一条新闻的标题
    titles = s.getTexts(url,selector="h3 a")
    # print(titles,len(titles))
    #1.1 h获取每一条新闻的网页链接
    newurls = s.getUrl(url,selector="h3 a")
    print(newurls)
    #1.2 根据链接地址url去获取那个新闻详情
    for index,newsurl in enumerate(newurls):
        contents = s.getTexts(newsurl,selector="div.article-main div.article-content p")
        print("这条新闻内容就搞定了.....HAPPY")
        with open("第{}页".format(page)+"第{}条.txt".format(index),'w+') as f:
            f.write(str(contents))
    # #2. 获取新闻简介
    # intro = s.getTexts(url,selector="div.news-main p")
    # #3.获取新闻发布单位
    # org = s.getTexts(url,selector="div.news-footer p span.author")
    # #4.发布时间
    # date = s.getTexts(url,selector="div.news-footer p span.date")
    # with open("result.txt",'a+') as f:
    #     f.write(r"\n 这是第{}页的新闻: \n".format(page)+str(titles))
    # print("报告小主,第{}页已经给您搞定！".format(page))
    time.sleep(2000)